{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import steward as st\n",
    "import matplotlib.pyplot as plt\n",
    "import pickle\n",
    "import xgboost\n",
    "from sklearn.metrics import roc_auc_score\n",
    "%matplotlib inline\n",
    "from src import build\n",
    "from src import train\n",
    "from src.feature_cols import to_drop\n",
    "import copy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "build.build_all()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "feature_list = [\n",
    "#     'basic_preprocess/cont_r_train_30W',\n",
    "\n",
    "    'basic_preprocess/cont_train_30W',\n",
    "    'basic_preprocess/conc_train_30W',\n",
    "    \n",
    "#     'index/index_train_30W'\n",
    "    \n",
    "    'feature/rank1_train_30W',\n",
    "    'feature/rank2_train_30W',\n",
    "    'feature/rank3_train_30W',\n",
    "    \n",
    "    'feature/history1_train_30W',\n",
    "    'feature/history2_train_30W',\n",
    "    'feature/history3_train_30W',\n",
    "\n",
    "    'feature/ordnum_train_30W',\n",
    "    'feature/room1_train_30W',\n",
    "    'feature/basicroom1_train_30W',\n",
    "    'feature/orderroom1_train_30W',\n",
    "    \n",
    "    'feature/rank_history1_train_30W',\n",
    "    'feature/rank_history2_train_30W',\n",
    "    'feature/rank_history3_train_30W',\n",
    "    \n",
    "#     'feature/room1head5_train_30W',\n",
    "    'feature/room1small5_train_30W',\n",
    "    'feature/room1large5_train_30W',\n",
    "#     'feature/orderroom1head5_train_30W',\n",
    "    'feature/orderroom1small5_train_30W',\n",
    "    'feature/orderroom1large5_train_30W',\n",
    "\n",
    "    'feature/custom1_train_30W',\n",
    "    'feature/roomcount_date_train_30W',\n",
    "]\n",
    "\n",
    "y = 'basic_preprocess/y_train_30W'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "__x = []\n",
    "for name in feature_list:\n",
    "    __x.append(st.get_instance(name).load())\n",
    "\n",
    "y_df = st.get_instance(y).load()\n",
    "\n",
    "X_df = pd.concat(__x, copy=False, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "t = st.LoadInstance('test/real_all_without_head_30W')\n",
    "t()\n",
    "im = t.df['im']\n",
    "result = t.df['result']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "result = pd.DataFrame(result, columns=['n', 'iter_n', 'score'])\n",
    "top_cols = im.head(395).index.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dropped_col: dense_rank_order_basicroom_price_last_cont14_reverse_norm\n",
      "best_iter: 294, best_score: 0.478597\n",
      "dropped_col: large5_order_group_mean_diff_roomtag_3\n",
      "best_iter: 188, best_score: 0.477050\n",
      "dropped_col: dense_rank_orderid_basicroomrank_basic_comment_ratio_norm\n",
      "best_iter: 93, best_score: 0.469314\n",
      "dropped_col: basicroom_room_agg_sum_roomtag_2\n",
      "best_iter: 215, best_score: 0.476019\n",
      "dropped_col: roomservice_3_islast\n",
      "best_iter: 143, best_score: 0.474471\n",
      "dropped_col: max_rank_order_basicroom_roomtag_3\n",
      "best_iter: 159, best_score: 0.476534\n",
      "dropped_col: 1month_cont6\n",
      "best_iter: 235, best_score: 0.476534\n",
      "dropped_col: roomservice_3_fit_num_1month\n",
      "best_iter: 199, best_score: 0.478081\n",
      "dropped_col: rank\n",
      "best_iter: 91, best_score: 0.470861\n",
      "dropped_col: 3month_cont3\n",
      "best_iter: 81, best_score: 0.470346\n",
      "dropped_col: ordertype_10_ratio\n",
      "best_iter: 282, best_score: 0.483755\n",
      "dropped_col: average_rank_orderby_order_room_rankby_room_room_30days_realratio_reverse\n",
      "best_iter: 200, best_score: 0.477050\n",
      "dropped_col: dense_rank_orderid_basicroomrank_basic_30days_realratio_reverse\n",
      "best_iter: 112, best_score: 0.473440\n",
      "dropped_col: ordertype_8_ratio\n",
      "best_iter: 158, best_score: 0.478081\n",
      "dropped_col: min_rank_orderby_order_room_rankby_room_returnvalue_reverse_norm\n",
      "best_iter: 268, best_score: 0.477050\n",
      "dropped_col: average_rank_order_basicroom_room_30days_realratio_reverse_norm\n",
      "best_iter: 183, best_score: 0.474987\n",
      "dropped_col: user_roomservice_4_2ratio\n",
      "best_iter: 227, best_score: 0.476534\n",
      "dropped_col: average_rank_orderid_basicroomrank_basic_30days_ordnumratio_reverse_norm\n",
      "best_iter: 55, best_score: 0.466220\n",
      "dropped_col: dense_rank_order_basicroom_roomtag_3_norm\n",
      "best_iter: 235, best_score: 0.481176\n",
      "dropped_col: min_rank_orderby_order_room_rankby_room_room_30days_realratio_reverse\n",
      "best_iter: 234, best_score: 0.474987\n",
      "dropped_col: dense_rank_orderby_order_room_rankby_room_roomtag_2_norm\n",
      "best_iter: 154, best_score: 0.470346\n",
      "dropped_col: dense_rank_order_basicroom_3month_cont7_norm\n",
      "best_iter: 205, best_score: 0.477050\n",
      "dropped_col: max_rank_orderid_basicroomrank_basic_minarea\n",
      "best_iter: 165, best_score: 0.478597\n",
      "dropped_col: user_roomservice_2_1ratio\n",
      "best_iter: 62, best_score: 0.469314\n",
      "dropped_col: min_rank_order_basicroom_room_30days_ordnumratio_reverse_norm\n",
      "best_iter: 217, best_score: 0.476534\n",
      "dropped_col: user_roomservice_8_1ratio_num\n",
      "best_iter: 148, best_score: 0.470861\n",
      "dropped_col: min_rank_order_basicroom_fit_room_serivice2_ratio_reverse\n",
      "best_iter: 144, best_score: 0.479113\n",
      "dropped_col: basicroom_room_agg_sum_roomtag_3\n",
      "best_iter: 119, best_score: 0.472924\n",
      "dropped_col: min_rank_orderby_order_room_rankby_room_roomtag_3_norm\n",
      "best_iter: 254, best_score: 0.478597\n",
      "dropped_col: min_rank_order_basicroom_fit_room_serivice4_ratio_norm\n",
      "best_iter: 228, best_score: 0.475503\n",
      "dropped_col: max_rank_orderby_order_room_rankby_room_room_30days_realratio_reverse\n",
      "best_iter: 170, best_score: 0.474471\n",
      "dropped_col: star_diff_last\n",
      "best_iter: 281, best_score: 0.481692\n",
      "dropped_col: user_minprice_1month\n",
      "best_iter: 98, best_score: 0.472924\n",
      "dropped_col: user_citynum\n",
      "best_iter: 132, best_score: 0.477050\n",
      "dropped_col: basicroom_room_agg_median_roomtag_3\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-7-a68afa2f8542>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     13\u001b[0m     \u001b[0mfeature_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mselect_cols\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'dropped_col: %s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mdropped_col\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m     \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_xgboost_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeature_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mearly_stopping_rounds\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_estimators\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel_para\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmodel_para\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     16\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'best_iter: %d, best_score: %f'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_iteration\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m     \u001b[0mdrop_result\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdropped_col\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_iteration\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/project/ctrip/src/train.py\u001b[0m in \u001b[0;36mtrain_xgboost_step\u001b[0;34m(X_df, y_df, use_model, model_para, kfold_k, sample_weight, use_piece, early_stopping_rounds, n_estimators, verbose, raw)\u001b[0m\n\u001b[1;32m     94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     95\u001b[0m     \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0muse_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mmodel_para\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 96\u001b[0;31m     \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meval_set\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meval_metric\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0meval_metric\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mearly_stopping_rounds\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mearly_stopping_rounds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     97\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     98\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/xgboost/sklearn.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight, eval_set, eval_metric, early_stopping_rounds, verbose)\u001b[0m\n\u001b[1;32m    443\u001b[0m                               \u001b[0mearly_stopping_rounds\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mearly_stopping_rounds\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    444\u001b[0m                               \u001b[0mevals_result\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mevals_result\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeval\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfeval\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 445\u001b[0;31m                               verbose_eval=verbose)\n\u001b[0m\u001b[1;32m    446\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    447\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobjective\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxgb_options\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"objective\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/xgboost/training.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(params, dtrain, num_boost_round, evals, obj, feval, maximize, early_stopping_rounds, evals_result, verbose_eval, learning_rates, xgb_model, callbacks)\u001b[0m\n\u001b[1;32m    203\u001b[0m                            \u001b[0mevals\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mevals\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    204\u001b[0m                            \u001b[0mobj\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeval\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfeval\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 205\u001b[0;31m                            xgb_model=xgb_model, callbacks=callbacks)\n\u001b[0m\u001b[1;32m    206\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    207\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/xgboost/training.py\u001b[0m in \u001b[0;36m_train_internal\u001b[0;34m(params, dtrain, num_boost_round, evals, obj, feval, xgb_model, callbacks)\u001b[0m\n\u001b[1;32m     84\u001b[0m         \u001b[0;31m# check evaluation result.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     85\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevals\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 86\u001b[0;31m             \u001b[0mbst_eval_set\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbst\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meval_set\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     87\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbst_eval_set\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSTRING_TYPES\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     88\u001b[0m                 \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbst_eval_set\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/xgboost/core.py\u001b[0m in \u001b[0;36meval_set\u001b[0;34m(self, evals, iteration, feval)\u001b[0m\n\u001b[1;32m    870\u001b[0m             \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'[%d]'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0miteration\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    871\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mdmat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevname\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mevals\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 872\u001b[0;31m                 \u001b[0mfeval_ret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfeval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdmat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdmat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    873\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeval_ret\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    874\u001b[0m                     \u001b[0;32mfor\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mfeval_ret\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/project/ctrip/src/train.py\u001b[0m in \u001b[0;36meval_metric\u001b[0;34m(y_pred, y_true)\u001b[0m\n\u001b[1;32m     87\u001b[0m         \u001b[0my_true\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my_true\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_label\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     88\u001b[0m         \u001b[0mtable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_pred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'pred_proba'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 89\u001b[0;31m         \u001b[0mpred_every_order\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'orderid'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpred_proba\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpred_proba\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'orderlabel'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     90\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0;34m'V'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpred_every_order\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mpred_every_order\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mN\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/pandas/core/groupby.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m    714\u001b[0m         \u001b[0;31m# ignore SettingWithCopy here in case the user mutates\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    715\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0moption_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'mode.chained_assignment'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 716\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_python_apply_general\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    717\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    718\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_python_apply_general\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/pandas/core/groupby.py\u001b[0m in \u001b[0;36m_python_apply_general\u001b[0;34m(self, f)\u001b[0m\n\u001b[1;32m    718\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_python_apply_general\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    719\u001b[0m         keys, values, mutated = self.grouper.apply(f, self._selected_obj,\n\u001b[0;32m--> 720\u001b[0;31m                                                    self.axis)\n\u001b[0m\u001b[1;32m    721\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    722\u001b[0m         return self._wrap_applied_output(\n",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/pandas/core/groupby.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, f, data, axis)\u001b[0m\n\u001b[1;32m   1711\u001b[0m                 hasattr(splitter, 'fast_apply') and axis == 0):\n\u001b[1;32m   1712\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1713\u001b[0;31m                 \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmutated\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msplitter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfast_apply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgroup_keys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1714\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mgroup_keys\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmutated\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1715\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mInvalidApply\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/pandas/core/groupby.py\u001b[0m in \u001b[0;36mfast_apply\u001b[0;34m(self, f, names)\u001b[0m\n\u001b[1;32m   4381\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4382\u001b[0m         \u001b[0msdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_sorted_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4383\u001b[0;31m         \u001b[0mresults\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmutated\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_frame_axis0\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstarts\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mends\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   4384\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4385\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmutated\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/src/reduce.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.apply_frame_axis0 (pandas/_libs/lib.c:41912)\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32m/home/shan/project/ctrip/src/train.py\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m     87\u001b[0m         \u001b[0my_true\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my_true\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_label\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     88\u001b[0m         \u001b[0mtable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_pred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'pred_proba'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 89\u001b[0;31m         \u001b[0mpred_every_order\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'orderid'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpred_proba\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpred_proba\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'orderlabel'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     90\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0;34m'V'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpred_every_order\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mpred_every_order\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mN\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    599\u001b[0m         \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    600\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 601\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    602\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    603\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m   2426\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2427\u001b[0m             return self._engine.get_value(s, k,\n\u001b[0;32m-> 2428\u001b[0;31m                                           tz=getattr(series.dtype, 'tz', None))\n\u001b[0m\u001b[1;32m   2429\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2430\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minferred_type\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'integer'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'boolean'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mdtype\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    332\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    333\u001b[0m         \u001b[0;34m\"\"\" return the dtype object of the underlying data \"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 334\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    335\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    336\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mdtype\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   4194\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4195\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4196\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_block\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   4197\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4198\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "# 每次drop一个\n",
    "drop_result = []\n",
    "model_para = {\n",
    "    'objective': 'rank:pairwise',\n",
    "    'n_estimators': 1000,\n",
    "#     'scale_pos_weight': 5,\n",
    "    'learning_rate': 0.3,\n",
    "    'max_depth':4,\n",
    "}\n",
    "for i in range(len(top_cols)-1, -1, -1):\n",
    "    select_cols = top_cols[0:i] + top_cols[i+1:] # 每次去掉一个\n",
    "    dropped_col = top_cols[i]\n",
    "    feature_df = X_df[select_cols]\n",
    "    print('dropped_col: %s' % dropped_col)\n",
    "    r = train.train_xgboost_step(feature_df, y_df, early_stopping_rounds=50, n_estimators=1000, model_para=model_para, verbose=False)\n",
    "    print('best_iter: %d, best_score: %f' % (r.best_iteration, -r.best_score))\n",
    "    drop_result.append([dropped_col, r.best_iteration, -r.best_score])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "result = pd.DataFrame(drop_result, columns=['col', 'n_iter', 'score'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "node = st.SaveInstance(result, name='test/drop395result')\n",
    "node()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "model_para = {\n",
    "    'objective': 'rank:pairwise',\n",
    "    'n_estimators': iter_n,\n",
    "#     'scale_pos_weight': 5,\n",
    "    'learning_rate': 0.3,\n",
    "    'max_depth':4\n",
    "}\n",
    "train.train_all(feature_df, y_df, model_para=model_para)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def get_cols_index(df, col_name_list):\n",
    "    index = []\n",
    "    assert (df.columns.duplicated() == False).all()\n",
    "    for col in col_name_list:\n",
    "        t = (df.columns == col).nonzero()[0][0]\n",
    "        assert t>=0, t\n",
    "        index.append(t)\n",
    "    return index\n",
    "get_cols_index(X_df, im.head(256).index.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "im.head(256).index.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "get_cols_index(X_df, im.head(256).index.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "len(im.head(256).index.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
